# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
from openpyxl import Workbook
from datetime import datetime
import re

class CcgpsaxPipeline(object):

    def __init__(self):
        self.wb = Workbook()
        self.ws = self.wb.active
        self.ws.append(['发布日期', '单位名称', '详情页链接'])
        self.savePath = '~/PycharmProjects/ccgpsax/'
        self.companys = [
            u'法律顾问',
            u'中介咨询机构',
            u'PPP'
        ]

    def process_item(self, item, spider):
        # 仅保留需要的企业名
        result = 0
        for companyCell in self.companys:
            tempResult = re.findall(companyCell, item['title'])
            if len(tempResult) > 0:
                result += 1
                break
            else:
                result += 0

        # 写入符合条件的职位，并剔除不符合条件的公司名
        if result > 0:
            line = [item['date'], item['title'], item['url']]
            self.ws.append(line)

            fileSaveName = 'ccgp-shaanxi招标信息_' + datetime.now().strftime('%Y%m%d_%H') + '.xlsx'
            self.wb.save(fileSaveName)
        return item
